Python Forum
program wanted for Posix
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
program wanted for Posix
#1
maybe you know the "head" command. maybe you know the "tail" command. i'd like to see someone create a "headtail" command that outputs the head of a file followed by the tail of that same file. if the file is so small that these would overlap or duplicate, then it should just output the whole file. arguments should allow specifying how many lines for the head part and how many lines for the tail part. and, of course, it must be written in Python3.
Tradition is peer pressure from dead people

What do you call someone who speaks three languages? Trilingual. Two languages? Bilingual. One language? American.
Reply
#2
You could use this head_tail() function to implement it
import itertools as itt
import more_itertools as moi
import unittest as ut


def head_tail(iterable, h, t):
    """Compute the head and the tail of an iterable

    Generates two iterables 'head' and 'tail'. The first one
    attempts to generate the h first items of the original
    iterable if possible. When the first iterable is
    exhausted, the second iterable attempts to generate the
    t last items remaining in the original iterable if that
    is possible.

    It is necessary to consume the 'head' iterable before
    attempting to iterate on the 'tail' iterable. The
    function more_itertools.consume() can be used to consume
    an iterable.

    Exemple:

    >>> head, tail = head_tail('abcdefghi', 4, 3)
    >>> print(list(head)) # prints ['a', 'b', 'c', 'd']
    >>> print(list(tail)) # prints ['g', 'h', 'i']
    """
    if (h < 0) or (t < 0):
        raise ValueError("nonnegative lengths expected")
    iterable = iter(iterable)

    yield itt.islice(iterable, None, h)

    if (t == 0):
        yield iter(())
    else:
        yield moi.islice_extended(iterable, -t, None)

class TestHeadTail(ut.TestCase):
    def test_large_data(self):
        L = "abcdefghijkl"
        head, tail = head_tail(L, 4, 3)
        self.assertEqual(list(head), list("abcd"))
        self.assertEqual(list(tail), list("jkl"))

    def test_tail_truncated(self):
        L = "abcdefghi"
        head, tail = head_tail(L, 7, 4)
        self.assertEqual(list(head), list("abcdefg"))
        self.assertEqual(list(tail), list("hi"))

    def test_zero_head(self):
        L = "abcdefgh"
        head, tail = head_tail(L, 0, 4)
        self.assertEqual(list(head), [])
        self.assertEqual(list(tail), list("efgh"))

    def test_zero_tail(self):
        L = "abcdefgh"
        head, tail = head_tail(L, 4, 0)
        self.assertEqual(list(head), list("abcd"))
        self.assertEqual(list(tail), [])


if __name__ == "__main__":
    ut.main()
« We can solve any problem by introducing an extra level of indirection »
Reply
#3
Here is a complete script (for a single file argument). Assumes utf8 encoding of the target file
#!/usr/bin/env python
from argparse import ArgumentParser
import itertools as itt
import more_itertools as moi
import sys
import unittest as ut


def head_tail(iterable, h, t):
    """Compute the head and the tail of an iterable

    Generates two iterables 'head' and 'tail'. The first one
    attempts to generate the h first items of the original
    iterable if possible. When the first iterable is
    exhausted, the second iterable attempts to generate the
    t last items remaining in the original iterable if that
    is possible.

    It is necessary to consume the 'head' iterable before
    attempting to iterate on the 'tail' iterable. The
    function more_itertools.consume() can be used to consume
    an iterable.

    Exemple:

    >>> head, tail = head_tail('abcdefghi', 4, 3)
    >>> print(list(head)) # prints ['a', 'b', 'c', 'd']
    >>> print(list(tail)) # prints ['g', 'h', 'i']
    """
    if (h < 0) or (t < 0):
        raise ValueError("nonnegative lengths expected")
    iterable = iter(iterable)

    yield itt.islice(iterable, None, h)

    if (t == 0):
        yield iter(())
    else:
        yield moi.islice_extended(iterable, -t, None)



class TestHeadTail(ut.TestCase):
    def test_large_data(self):
        L = "abcdefghijkl"
        head, tail = head_tail(L, 4, 3)
        self.assertEqual(list(head), list("abcd"))
        self.assertEqual(list(tail), list("jkl"))

    def test_tail_truncated(self):
        L = "abcdefghi"
        head, tail = head_tail(L, 7, 4)
        self.assertEqual(list(head), list("abcdefg"))
        self.assertEqual(list(tail), list("hi"))

    def test_zero_head(self):
        L = "abcdefgh"
        head, tail = head_tail(L, 0, 4)
        self.assertEqual(list(head), [])
        self.assertEqual(list(tail), list("efgh"))

    def test_zero_tail(self):
        L = "abcdefgh"
        head, tail = head_tail(L, 4, 0)
        self.assertEqual(list(head), list("abcd"))
        self.assertEqual(list(tail), [])

def main():
    parser = ArgumentParser(description='Print head and tail of files')
    parser.add_argument('-n', '--headsize', type=int, default=10, dest='headsize', help='defaults to 10 lines')
    parser.add_argument('-m', '--tailsize', type=int, default=10, dest='tailsize', help='defaults to 10 lines')
    parser.add_argument('file', metavar='FILE')
    arg = parser.parse_args()
    with open(arg.file) as infile:
        print(f'==> (head) {arg.file} <==')
        head, tail = head_tail(infile, arg.headsize, arg.tailsize)
        sys.stdout.writelines(head)
        print(f'==> (tail) {arg.file} <==')
        sys.stdout.writelines(tail)



if __name__ == "__main__":
    main()
Output:
λ python paillasse/pf/headtail.py -h usage: headtail.py [-h] [-n HEADSIZE] [-m TAILSIZE] FILE Print head and tail of files positional arguments: FILE options: -h, --help show this help message and exit -n HEADSIZE, --headsize HEADSIZE defaults to 10 lines -m TAILSIZE, --tailsize TAILSIZE defaults to 10 lines λ
Example
Output:
λ python paillasse/pf/headtail.py paillasse/pf/headtail.py -n 5 -m 7 ==> (head) paillasse/pf/headtail.py <== from argparse import ArgumentParser import itertools as itt import more_itertools as moi import sys import unittest as ut ==> (tail) paillasse/pf/headtail.py <== print(f'==> (tail) {arg.file} <==') sys.stdout.writelines(tail) if __name__ == "__main__": main()
snippsat likes this post
« We can solve any problem by introducing an extra level of indirection »
Reply
#4
Skaperen you should show some effort when posting these requests 💤

(May-13-2024, 08:32 AM)Gribouillis Wrote: Here is a complete script (for a single file argument). Assumes utf8 encoding of the target file
Good,to make more robust as it fail sometime on files with Unicodes on Windows.
Windows dos bad guessing:
Error:
UnicodeDecodeError: 'charmap' codec can't decode byte .....
with open(arg.file, encoding='utf-8', errors='ignore') as infile:
    print(f'==> (head) {arg.file} <==') 

Here my take on this and i use Typer for CLI application.
import typer
from collections import deque

app = typer.Typer()

@app.command()
def headtail(filename: str, head: int = 5, tail: int = 5):
    tail_deque = deque(maxlen=tail)
    head_list = []
    total_lines = 0
    try:
        with open(filename, encoding='utf-8', errors='ignore') as file:
            for i, line in enumerate(file):
                if i < head:
                    head_list.append(line)
                tail_deque.append(line)
                total_lines += 1
        if total_lines <= head + tail:
            for line in head_list:
                print(line, end='')
        else:
            for line in head_list:
                print(line, end='')
            if tail > 0:
                print("...........")
            for line in tail_deque:
                print(line, end='')
    except FileNotFoundError:
        typer.echo(f"Error: The file '{filename}' does not exist.", err=True)
    except Exception as e:
        typer.echo(f"An error occurred: {e}", err=True)

if __name__ == "__main__":
    app()
[Image: LFeJDO.png]
Compare to see that it work the same.
Output:
G:\div_code\reader_env λ python head_tail_grib.py -n 2 -m 7 contry.txt ==> (head) contry.txt <== Tokyo;35.6897 Jakarta;-6.1750 ==> (tail) contry.txt <== Nanyang;32.9987 Hangzhou;30.2500 Foshan;23.0292 Nagoya;35.1833 Taipei;25.0375 Tongshan;34.2610 Dhanbād;23.7998 G:\div_code\reader_env λ python head_tail.py contry.txt --head 2 --tail 7 Tokyo;35.6897 Jakarta;-6.1750 ............ Nanyang;32.9987 Hangzhou;30.2500 Foshan;23.0292 Nagoya;35.1833 Taipei;25.0375 Tongshan;34.2610 Dhanbād;23.7998 G:\div_code\reader_env
Update: had two "with open" need only one.
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  the posix "cut" command Skaperen 0 1,850 Oct-28-2018, 08:29 PM
Last Post: Skaperen
  program wanted in python Skaperen 2 2,742 Aug-07-2018, 12:05 AM
Last Post: Skaperen
  program wanted: diff that ignores numbers Skaperen 0 1,945 Jun-16-2018, 02:05 AM
Last Post: Skaperen
  program wanted: clean up pyc files Skaperen 6 5,771 Jun-13-2017, 05:42 PM
Last Post: snippsat
  anyone interested in a project of making python versions of various POSIX commands Skaperen 10 10,529 Oct-17-2016, 08:36 AM
Last Post: wavic

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020