Source code for pmotools.scripts.extract_info_from_pmo.count_specimen_meta
#!/usr/bin/env python3
import argparse
import sys
from pmotools.pmo_engine.pmo_processor import PMOProcessor
from pmotools.pmo_engine.pmo_reader import PMOReader
from pmotools.utils.small_utils import Utils
[docs]def get_parser_count_specimen_meta() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
prog="pmotools-python count_specimen_meta",
description="Count values of selected specimen meta fields",
)
parser.add_argument("--file", type=str, required=True, help="PMO file")
parser.add_argument(
"--output", type=str, default="STDOUT", required=False, help="output file"
)
parser.add_argument(
"--delim",
default="tab",
type=str,
required=False,
help="the delimiter of the output text file, examples input tab,comma but can also be the actual delimiter",
)
parser.add_argument(
"--overwrite", action="store_true", help="If output file exists, overwrite it"
)
parser.add_argument(
"--meta_fields",
type=str,
required=True,
help="the fields to count the subfields of, can supply multiple separated by commas, e.g. --meta_fields collection_country,collection_date",
)
return parser
[docs]def parse_args_count_specimen_meta():
parser = get_parser_count_specimen_meta()
return parser.parse_args()
[docs]def count_specimen_meta():
args = parse_args_count_specimen_meta()
# check files
output_delim, output_extension = Utils.process_delimiter_and_output_extension(
args.delim, gzip=args.output.endswith(".gz")
)
args.output = (
args.output
if "STDOUT" == args.output
else Utils.appendStrAsNeeded(args.output, output_extension)
)
Utils.inputOutputFileCheck(args.file, args.output, args.overwrite)
# process the meta_fields argument
meta_fields_toks = args.meta_fields.split(",")
# read in PMO
pmo = PMOReader.read_in_pmo(args.file)
# count sub-fields
counts_df = PMOProcessor.count_specimen_by_field_value(pmo, meta_fields_toks)
# write out
counts_df.to_csv(
sys.stdout if "STDOUT" == args.output else args.output,
sep=output_delim,
index=False,
)
if __name__ == "__main__":
count_specimen_meta()