Source code for texar.torch.utils.utils_io

# Copyright 2019 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Utility functions related to input/output.
"""
import os

__all__ = [
    "write_paired_text",
    "maybe_create_dir",
]


[docs]def write_paired_text(src, tgt, fname, append=False, mode='h', sep='\t', src_fname_suffix='src', tgt_fname_suffix='tgt'): r"""Writes paired text to a file. Args: src: A list (or array) of ``str`` source text. tgt: A list (or array) of ``str`` target text. fname (str): The output filename. append (bool): Whether append content to the end of the file if exists. mode (str): The mode of writing, with the following options: - **'h'**: The "horizontal" mode. Each source target pair is written in one line, intervened with :attr:`sep`, e.g.:: source_1 target_1 source_2 target_2 - **'v'**: The ``"vertical"`` mode. Each source target pair is written in two consecutive lines, e.g:: source_1 target_1 source_2 target_2 - **'s'**: The "separate" mode. Each source target pair is written in corresponding lines of two files named as ``"{fname}.{src_fname_suffix}"`` and ``"{fname}.{tgt_fname_suffix}"``, respectively. sep (str): The string intervening between source and target. Used when :attr:`mode` is set to ``"h"``. src_fname_suffix (str): Used when :attr:`mode` is ``"s"``. The suffix to the source output filename. For example, with ``(fname='output', src_fname_suffix='src')``, the output source file is named as ``output.src``. tgt_fname_suffix (str): Used when :attr:`mode` is ``"s"``. The suffix to the target output filename. Returns: The filename(s). If ``mode`` == ``"h"`` or ``"v"``, returns :attr:`fname`. If ``mode`` == ``"s"``, returns a list of filenames ``["{fname}.src", "{fname}.tgt"]``. """ fmode = 'a' if append else 'w' if mode == 's': fn_src = '{}.{}'.format(fname, src_fname_suffix) fn_tgt = '{}.{}'.format(fname, tgt_fname_suffix) with open(fn_src, fmode, encoding='utf-8') as fs: fs.write('\n'.join(src)) fs.write('\n') with open(fn_tgt, fmode, encoding='utf-8') as ft: ft.write('\n'.join(tgt)) ft.write('\n') return fn_src, fn_tgt else: with open(fname, fmode, encoding='utf-8') as f: for s, t in zip(src, tgt): if mode == 'h': text = '{}{}{}\n'.format(s, sep, t) f.write(text) elif mode == 'v': text = '{}\n{}\n'.format(s, t) f.write(text) else: raise ValueError('Unknown mode: {}'.format(mode)) return fname
[docs]def maybe_create_dir(dirname: str) -> bool: r"""Creates directory if it does not exist. Args: dirname (str): Path to the directory. Returns: bool: Whether a new directory is created. """ if not os.path.isdir(dirname): os.makedirs(dirname) return True return False