Practical use of pathlib - Printable Version

Practical use of pathlib - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: General (https://python-forum.io/forum-1.html)
+--- Forum: Code sharing (https://python-forum.io/forum-5.html)
+--- Thread: Practical use of pathlib (/thread-5699.html)

Practical use of pathlib - Larz60+ - Oct-17-2017

The app below uses pathlib objects to read the header record from national fips code files
and print each. Eventually it will process each into an easy access format, but for proof of
concept, I am only printing out the first line of each.

module ReadFipsCodeData.py:

from pathlib import Path, PurePath
import json


class ReadFipsCodeData:
    def __init__(self):
        self.fips_dirs = [ ]

        self.homepath = Path('.')

        self.datapath = self.homepath / 'data'
        self.code_descr_filepath = self.datapath / 'code_descr.json'
        self.fips_class_filepath = self.datapath / 'fips_class.json'

        self.code_datapath = self.homepath / '..' / 'data' / 'Codes' / 'ANSI-FIPS-Codes' / 'AllFiles'

        with self.code_descr_filepath.open('r') as jn:
            self.code_categories = json.load(jn)

        with self.fips_class_filepath.open('r') as jn:
            self.fips_class = json.load(jn)

    def build_national_code_db(self):

        for filename, value in self.code_categories.items():
            f = value['filename']
            filepath = self.code_datapath / f

            delimiter = value['delim']

            with filepath.open('r', encoding='ISO-8859-1') as fin:
                file_data = fin.readlines()
                # Just read first line for now (should be heading in each case)
                # This can be matched to self.code_categories (value here) dictionary to get
                # description of file, and info on each field
                header = True
                for line in file_data:
                    line = line.strip()
                    if len(line) == 0:
                        continue
                    line = line.split(delimiter)
                    if header:
                        print(f'\nfilename: {f}')
                        print(f'Header: {line}')
                        header = False


def main():
    cj = ReadFipsCodeData()
    cj.build_national_code_db()

if __name__ == '__main__':
    main()

This produces the following output:

Output:filename: national_aia.txt
Header: ['AIANNHCE', 'AIANNHNAME']

filename: national_cd113.txt
Header: ['STATE   STATEFP CD113FP NAMELSAD']

filename: national_county.txt
Header: ['AL', '01', '001', 'Autauga County', 'H1']

filename: national_cousub.txt
Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'COUSUBFP', 'COUSUBNAME', 'FUNCSTAT']

filename: national_places.txt
Header: ['STATE', 'STATEFP', 'PLACEFP', 'PLACENAME', 'TYPE', 'FUNCSTAT', 'COUNTY']

filename: national_schdist.txt
Header: ['STATE', 'STATEFP', 'LEA', 'SDNAME', 'TYPE']

filename: national_vtd.txt
Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'VTDST', 'VTDNAME']

The two dictionaries that are used to get file information are created with the following application:
CreateFIPScodeDescJson.py:

from pathlib import Path, PurePath
import json


class CreateFIPScodeDescJson:
    def __init__(self):
        self.homepath = Path('.')
        self.datapath = self.homepath / 'data'
        self.code_descr_filepath = self.datapath / 'code_descr.json'
        self.fips_class_filepath = self.datapath / 'fips_class.json'

        self.fips_class = {
            'H1': 'identifies an active county or statistically equivalent entity that does not qualify'
                  ' under subclass C7 or H6.',
            'H4': 'identifies a legally defined inactive or nonfunctioning county or statistically '
                  'equivalent entity that does not qualify under subclass H6.',
            'H5': 'identifies census areas in Alaska, a statistical county equivalent entity.',
            'H6': 'identifies a county or statistically equivalent entity that is areally coextensive '
                  'or governmentally consolidated with an incorporated place, part of an incorporated '
                  'place, or a consolidated city.',
            'C7': 'identifies an incorporated place that is an independent city; that is, it also '
                  'serves as a county equivalent because it is not part of any county, and a minor '
                  'civil division (MCD) equivalent because it is not part of any MCD.'
        }

        self.code_categories = {
            'national_aia.txt': {
                'delim': ',',
                'filename': 'national_aia.txt',
                'fields': {
                    'AIANNHCE': {
                        'length': '4',
                        'type': 'String',
                        'descr': 'Current American Indian/Alaska Native/Native Hawaiian area census code'
                    },
                    'AIANNHNAME': {
                        'length': '',
                        'type': 'String',
                        'descr': 'American Indian Area name and legal/statistical area description'
                    }
                },
            },
            'national_cd113.txt': {
                'delim': 'spaces',
                'filename': 'national_cd113.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State FIPS CODE'
                    },
                    'CD113FP': {
                        'length': '4',
                        'type': 'String',
                        'descr': '113th Congressional District FIPS Code'
                    },
                    'NAMELSAD': {
                        'length': '41',
                        'type': 'String',
                        'descr': 'Current name and the translated legal/statistical area description'
                                 ' for the congressional district'
                    }
                },
            },
            'national_county.txt': {
                'delim': ',',
                'filename': 'national_county.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State FIPS Code'
                    },
                    'COUNTYFP': {
                        'length': '3',
                        'type': 'String',
                        'descr': 'County FIPS Code'
                    },
                    'COUNTYNAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'County Name and Legal/Statistical Area Description'
                    },
                    'CLASSFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'FIPS Class Code'
                    }
                },
            },
            'national_cousub.txt': {
                'delim': ',',
                'filename': 'national_cousub.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'COUNTYFP': {
                        'length': '3',
                        'type': 'String',
                        'descr': 'Current county FIPS code'
                    },
                    'NAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current county subdivision name'
                    },
                    'COUSUBFP': {
                        'length': '5',
                        'type': 'String',
                        'descr': 'Current county subdivision FIPS code'
                    },
                    'NAMELSAD': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current name and the translated legal/statistical area description code '
                                 'for county subdivision'
                    },
                    'FUNCSTAT': {
                        'length': '1',
                        'type': 'String',
                        'descr': 'Current functional status'
                    },
                }
            },
            'national_places.txt': {
                'delim': '|',
                'filename': 'national_places.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'PLACEFP': {
                        'length': '5',
                        'type': 'String',
                        'descr': 'Current place FIPS code'
                    },
                    'PLACENAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current place name'
                    },
                    'TYPE': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Place Name Type'
                    },
                    'FUNCSTAT': {
                        'length': '1',
                        'type': 'String',
                        'descr': 'Current functional status'
                    },
                    'COUNTY': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current County Name'
                    }
                }
            },
            'national_schdist.txt': {
                'delim': ',',
                'filename': 'national_schdist.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'LEA': {
                        'length': '5',
                        'type': 'String',
                        'descr': 'Federal Local Education Agency ID'
                    },
                    'SDNAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current unified school district name'
                    },
                    'TYPE': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'Current school district type'
                    }
                },
            },
            'national_vtd.txt': {
                'delim': '|',
                'filename': 'national_vtd.txt',
                'fields': {
                    'STATE': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'State Postal Code'
                    },
                    'STATEFP': {
                        'length': '2',
                        'type': 'String',
                        'descr': 'Current state FIPS code'
                    },
                    'COUNTYFP': {
                        'length': '3',
                        'type': 'String',
                        'descr': 'County FIPS Code'
                    },
                    'COUNTYNAME': {
                        'length': '100',
                        'type': 'String',
                        'descr': 'County Name and Legal/Statistical Area Description'
                    },
                    'VTDST':{
                        'length': '',
                        'type': '',
                        'descr': 'Voter District'
                    },
                    'VTDNAME':{
                        'length': '',
                        'type': '',
                        'descr': 'Voter District Name'
                    }
                },
            }
        }

        # save code class dict
        with self.fips_class_filepath.open('w') as jout:
            json.dump(self.fips_class, jout)

        # Save code description dict
        with self.code_descr_filepath.open('w') as jout:
            json.dump(self.code_categories, jout)

if __name__ == '__main__':
    CreateFIPScodeDescJson()

This type of file definition allows for file content verification
Example, new version of ReadFipsCodeData.py with verification:

from pathlib import Path, PurePath
import json


class ReadFipsCodeData:
    def __init__(self):
        self.fips_dirs = [ ]

        self.homepath = Path('.')

        self.datapath = self.homepath / 'data'
        self.code_descr_filepath = self.datapath / 'code_descr.json'
        self.fips_class_filepath = self.datapath / 'fips_class.json'

        self.code_datapath = self.homepath / '..' / 'data' / 'Codes' / 'ANSI-FIPS-Codes' / 'AllFiles'

        with self.code_descr_filepath.open('r') as jn:
            self.code_categories = json.load(jn)

        with self.fips_class_filepath.open('r') as jn:
            self.fips_class = json.load(jn)

    def verify_file_content(self, header, filename, file_format):
        try:
            print(f'\n{filename}')
            print(f'Header: {header}')
            for fieldname in header:
                field = file_format['fields'][fieldname]
                print(f'\n    fieldname: {fieldname}')
                print(f"    length: {field['length']}, type: {field['type']}")
                print(f"    {field['descr']}")
            return True
        except:
            raise FileNotFoundError

    def build_national_code_db(self):
        try:
            for filename, value in self.code_categories.items():
                f = value['filename']
                filepath = self.code_datapath / f

                delimiter = value['delim']

                with filepath.open('r', encoding='ISO-8859-1') as fin:
                    file_data = fin.readlines()
                    # Just read first line for now (should be heading in each case)
                    # This can be matched to self.code_categories (value here) dictionary to get
                    # description of file, and info on each field
                    header_found = False
                    for line in file_data:
                        line = line.strip()
                        if len(line) == 0:
                            continue
                        if delimiter == '':
                            line = line.split()
                        else:
                            line = line.split(delimiter)
                        if header_found:
                            # Process remainder of file
                            pass
                        else:
                            if self.verify_file_content(line, filename, value):
                                header_found = True
                                break
        except FileNotFoundError:
            print(f'The directory {self.code_datapath.resolve()} does not contain FIPS data files.')
            print(f'Please supply proper directory name')


def main():
    cj = ReadFipsCodeData()
    cj.build_national_code_db()

if __name__ == '__main__':
    main()

which produces:

Output:national_aia.txt
Header: ['AIANNHCE', 'AIANNHNAME']

    fieldname: AIANNHCE
    length: 4, type: String
    Current American Indian/Alaska Native/Native Hawaiian area census code

    fieldname: AIANNHNAME
    length: , type: String
    American Indian Area name and legal/statistical area description

national_cd113.txt
Header: ['STATE', 'STATEFP', 'CD113FP', 'NAMELSAD']

    fieldname: STATE
    length: 2, type: String
    State Postal Code

    fieldname: STATEFP
    length: 2, type: String
    State FIPS CODE

    fieldname: CD113FP
    length: 4, type: String
    113th Congressional District FIPS Code

    fieldname: NAMELSAD
    length: 41, type: String
    Current name and the translated legal/statistical area description for the congressional district

national_county.txt
Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'CLASSFP']

    fieldname: STATE
    length: 2, type: String
    State Postal Code

    fieldname: STATEFP
    length: 2, type: String
    State FIPS Code

    fieldname: COUNTYFP
    length: 3, type: String
    County FIPS Code

    fieldname: COUNTYNAME
    length: 100, type: String
    County Name and Legal/Statistical Area Description

    fieldname: CLASSFP
    length: 2, type: String
    FIPS Class Code

national_cousub.txt
Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'COUSUBFP', 'COUSUBNAME', 'FUNCSTAT']

    fieldname: STATE
    length: 2, type: String
    State Postal Code

    fieldname: STATEFP
    length: 2, type: String
    Current state FIPS code

    fieldname: COUNTYFP
    length: 3, type: String
    Current county FIPS code

    fieldname: COUNTYNAME
    length: 100, type: String
    Current county subdivision name

    fieldname: COUSUBFP
    length: 5, type: String
    Current county subdivision FIPS code

    fieldname: COUSUBNAME
    length: 100, type: String
    Current name and the translated legal/statistical area description code for county subdivision

    fieldname: FUNCSTAT
    length: 1, type: String
    Current functional status

national_places.txt
Header: ['STATE', 'STATEFP', 'PLACEFP', 'PLACENAME', 'TYPE', 'FUNCSTAT', 'COUNTY']

    fieldname: STATE
    length: 2, type: String
    State Postal Code

    fieldname: STATEFP
    length: 2, type: String
    Current state FIPS code

    fieldname: PLACEFP
    length: 5, type: String
    Current place FIPS code

    fieldname: PLACENAME
    length: 100, type: String
    Current place name

    fieldname: TYPE
    length: 100, type: String
    Place Name Type

    fieldname: FUNCSTAT
    length: 1, type: String
    Current functional status

    fieldname: COUNTY
    length: 100, type: String
    Current County Name

national_schdist.txt
Header: ['STATE', 'STATEFP', 'LEA', 'SDNAME', 'TYPE']

    fieldname: STATE
    length: 2, type: String
    State Postal Code

    fieldname: STATEFP
    length: 2, type: String
    Current state FIPS code

    fieldname: LEA
    length: 5, type: String
    Federal Local Education Agency ID

    fieldname: SDNAME
    length: 100, type: String
    Current unified school district name

    fieldname: TYPE
    length: 100, type: String
    Current school district type

national_vtd.txt
Header: ['STATE', 'STATEFP', 'COUNTYFP', 'COUNTYNAME', 'VTDST', 'VTDNAME']

    fieldname: STATE
    length: 2, type: String
    State Postal Code

    fieldname: STATEFP
    length: 2, type: String
    Current state FIPS code

    fieldname: COUNTYFP
    length: 3, type: String
    County FIPS Code

    fieldname: COUNTYNAME
    length: 100, type: String
    County Name and Legal/Statistical Area Description

    fieldname: VTDST
    length: , type:
    Voter District

    fieldname: VTDNAME
    length: , type:
    Voter District Name