Source code for pmotools.scripts.extractors_from_pmo.extract_pmo_with_selected_meta
#!/usr/bin/env python3
import argparse
import sys
from pmotools.pmo_engine.pmo_processor import PMOProcessor
from pmotools.pmo_engine.pmo_reader import PMOReader
from pmotools.pmo_engine.pmo_writer import PMOWriter
from pmotools.utils.small_utils import Utils
[docs]def get_parser_extract_pmo_with_selected_meta() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="pmotools-python extract_pmo_with_selected_meta",
description="Extract samples + haplotypes using selected meta",
)
parser.add_argument("--file", type=str, required=True, help="PMO file")
parser.add_argument(
"--output", type=str, required=True, help="Output json file path"
)
parser.add_argument(
"--overwrite", action="store_true", help="If output file exists, overwrite it"
)
parser.add_argument(
"--verbose",
action="store_true",
help="write out various messages about extraction",
)
parser.add_argument(
"--metaFieldsValues",
type=str,
required=True,
help="Meta Fields to include, should either be a table with columns field, values (and optionally group) or supplied command line as field1=value1,value2,value3:field2=value1,value2",
)
return parser
[docs]def parse_args_extract_pmo_with_selected_meta():
parser = get_parser_extract_pmo_with_selected_meta()
return parser.parse_args()
[docs]def extract_pmo_with_selected_meta():
args = parse_args_extract_pmo_with_selected_meta()
# check files
Utils.inputOutputFileCheck(args.file, args.output, args.overwrite)
# read in pmo
pmo = PMOReader.read_in_pmo(args.file)
# extract out of PMO
pmo_out, group_counts = PMOProcessor.extract_from_pmo_samples_with_meta_groupings(
pmo, args.metaFieldsValues
)
# write out the extracted
args.output = PMOWriter.add_pmo_extension_as_needed(
args.output, args.file.endswith(".gz") or args.output.endswith(".gz")
)
PMOWriter.write_out_pmo(pmo_out, args.output, args.overwrite)
if args.verbose:
sys.stdout.write(
"Extracted the following number of specimens per group:" + "\n"
)
group_counts.to_csv(sys.stdout, sep="\t", index=True)
if __name__ == "__main__":
extract_pmo_with_selected_meta()