#!/usr/bin/perl # # jsonfmt This script pretty-prints a collapsed set of text data # into an easy-to-read, expanded version of the data. # The text data is expected to be in a JSON-y format. # # This is a reworked version of WMorrison's datafmt script, # which was written in 2019. # # usage: # jsonfmt [-... | -help | -Version] # # Revision History # 1.0 Initial revision. 200216 # 1.1 Added the -indent option. 200322 # # Copyright 2020 Wayne Morrison. # Written by Wayne Morrison, 200216. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use strict; use Getopt::Long qw(:config no_ignore_case_always); # # Version information. # my $NAME = "jsonfmt"; my $VERS = "$NAME version: 1.1"; ############################################################################ # # Options fields. # my %opts = (); # Options. # # Command line arguments. # my @opts = ( 'depth=n', # Only show output to certain depth. 'indent=n', # Set size of each indentation level. 'noblanks', # Don't include blank lines. 'showinput', # Show input. 'struct', # Show structure of formatted data. 'verbose', # Give verbose output. 'help', # Give a help message. 'Version', # Display the program version. ); my $depth = -1; # Show-depth flag. my $noblanks = 0; # No-blank-lines flag. my $showinput = 0; # Show-input flag. my $struct = 0; # Show-structure flag. my $verbose = 0; # Verbose flag. my $indent = " "; # Indent string. my $infile = ''; # Input filename. my @structure = (); # Structure of lines. ############################################################################ main(); exit(0); #----------------------------------------------------------------------------- # Routine: main() # sub main { my $longline; # Joined-up lines of input. my @data; # Formatted data to print. $| = 1; # # Munch on the options and arguments. # optsandargs(); # # Get the input to format. # $longline = getlines(); # # Format the input to a nice pretty representation. # @data = formatter($longline); # # Print the formatted data. # printer(@data); } #---------------------------------------------------------------------- # Routine: optsandargs() # # Purpose: Parse the command line for options and arguments. # sub optsandargs { # # Parse the options. # GetOptions(\%opts,@opts) || usage(); # # Check for some immediate-action options. # usage() if(defined($opts{'help'})); version() if(defined($opts{'Version'})); $verbose = $opts{'verbose'}; $noblanks = $opts{'noblanks'}; $showinput = $opts{'showinput'}; $struct = $opts{'struct'}; $depth = $opts{'depth'} if(defined($opts{'depth'})); $indent = " " x $opts{'indent'} if(defined($opts{'indent'})); # # If there's an argument left, we'll assume it's an input file. # if(@ARGV > 0) { $infile = @ARGV[0]; } } #---------------------------------------------------------------------- # Routine: getlines() # # Purpose: This routine gets the input JSON, whether from a file or # from standard input. It's all smooshed into a single line # of text. # sub getlines { my @lines; # Lines of input. my $longline; # Joined-up lines of input. # # Get the input. We'll take it either from a file or from stdin. # if($infile ne '') { if(open(INFILE, "< $infile") == 0) { print stderr "jsonfmt: unable to open \"$infile\"\n"; exit(1); } @lines = ; close(INFILE); } else { @lines = ; } # # Make one big long line from the input we read. # $longline = join '', @lines; # # Make one big long line from the input we read. # if($showinput) { print "\n$longline\n------------------\n"; } return($longline); } #---------------------------------------------------------------------- # Routine: formatter() # # Purpose: This routine divides a single line of JSON text into separate # lines, formatting each one as it goes. The formatted text is # put into a list, which is returned to the caller. # # The formatting actions are: # - put each block marker (left and right squigglies # and square brackets) on its own line # - indent lines to show the block membership # - put each line of data on its own line # - change each "key:value" line to be spaced according # to the longest key name in each indentation level. # This is done by indentation level throughout the # whole data, not just within a block. # - a blank line is added after a block's end marker # - empty lines are removed iff -noblanks was given # # Single quotes and double quotes are assumed to be paired with # a single line. # A single-quoted string may contain any number of double quotes. # A double-quoted string may contain any number of single quotes. # Characters may be escaped with '\'. # sub formatter { my $longline = shift; # Joined-up lines of input. my @lines; # Lines of input. my $lineind = 0; # Line index for processed lines. my $level = 0; # Indentation level. my $ch; # Current character to check. my $lastch = ''; # Previous character checked. my $squoting = 0; # Flag for inside single quotes. my $dquoting = 0; # Flag for inside double quotes. my $escaping = 0; # Flag for escape character given. my @maxlevels = (); # Maximum length of keys in each level. my @maxstrs = (); # Longest keys in each level. # # Dump the whitespace from the end of the line. # chomp $longline; $longline =~ s/^\s*(\S*)\s*/$1/; # # Go through the long line character by character and build the # formatted output array. # for(my $ind = 0; $ind < length($longline); $ind++) { # # Save the previous character and get the next character # from the line. # $lastch = $ch; $ch = substr($longline, $ind, 1); # # If the previous character was an escape character, we'll # add this one to the line buffer and turn off escaping. # if($escaping) { $lines[$lineind] .= $ch; $escaping = 0; next; } # # If we're in the middle of a quote string, we'll g'head and # add this character to the line. If we hit the end of the # string (find a matching quote/dquote), we'll turn off the # appropriate quoting flag. We'll turn on escaping if this # character is a backslash. # if($squoting) { $lines[$lineind] .= $ch; $squoting = 0 if($ch eq "\'"); $escaping = 1 if($ch eq "\\"); next; } elsif($dquoting) { $lines[$lineind] .= $ch; $dquoting = 0 if($ch eq '"'); $escaping = 1 if($ch eq "\\"); next; } # # If this is a single quote, we'll check if we're quoting # things already. # if($ch eq "\'") { # # If we aren't double-quoting, turn on/off single- # quoting (if we're already single-quoting.) # if(! $dquoting) { if($squoting) { $squoting = 0; } else { $squoting = 1; } } # # Add this single quote to the current line. # Indent the line if this is the start of the line. # $lines[$lineind] = ($indent x $level) if(length($lines[$lineind]) == 0); $lines[$lineind] .= $ch; $structure[$lineind] = $level; } # # If this is a double quote, we'll check if we're quoting # things already. # elsif($ch eq '"') { # # If we aren't single-quoting, turn on/off double- # quoting (if we're already double-quoting.) # if(! $squoting) { if($dquoting) { $dquoting = 0; } else { $dquoting = 1; } } # # Add this single quote to the current line. # Indent the line if this is the start of the line. # $lines[$lineind] = ($indent x $level) if(length($lines[$lineind]) == 0); $lines[$lineind] .= $ch; $structure[$lineind] = $level; } # # If this is a left bracket (squiggly or square), we'll move # it to the next line and indent it to the proper level. # We'll also increase the indentation level and start a # new line with the appropriate level of indentation. # elsif($ch =~ /[{\[]/) { # # The first line is a special case. We'll put the # bracket in the first output-lines slot and set all # the indices appropriately. On to the next # character after that. # if($lineind == 0) { $lines[0] = $ch; $structure[0] = 0; $lineind = 1; $level = 1; next; } # # Increment the line count if the previous line # wasn't a bracket. # if($lastch !~ /[{\[]/) { $lineind++; } # # Put the indented bracket on the next line. # $lines[$lineind] = ($indent x $level) . $ch; $structure[$lineind] = $level; $lineind++; $level++; } # # If this is a right squiggly or right bracket, we'll move # it to the next line and indent it to the proper level. # We'll also decrease the indentation level and start a # new line with the appropriate level of indentation. # elsif($ch =~ /[}\]]/) { # # Put the squiggly or bracket on the next line. # if($lines[$lineind] !~ /^\s*$/) { $lineind++; } $level--; $lines[$lineind] = ($indent x $level) . $ch; $structure[$lineind] = $level; # # Add the next line's indentation. # $lineind++; $lines[$lineind] = ($indent x $level); $structure[$lineind] = $level; } # # If this is a comma, we'll either put it at the end of this # line or the previous line. It only goes at the end of the # previous line if that line ended with a right squiggly or a # right bracket. Otherwise, it goes at the end of this line. # We'll also start a new line with the appropriate level of # indentation. # elsif($ch eq ',') { my $lastline = $lineind - 1; # Previous line's index. # # If the previous line ended with a right squiggly or # a right bracket, add the comma to that line. # Otherwise, add the comma to the end of this line. # if($lines[$lastline] =~ /[}\]]$/) { $lines[$lastline] .= ","; } else { $lines[$lineind] .= ","; } # # Add the next line's indentation. # # XXX Do we *always* want to start a new # line after an unquoted, unescaped comma? # $lineind++; $lines[$lineind] = ($indent x $level); $structure[$lineind] = $level; } # # All other characters are added to the end of the current # line. If the character is a space, it'll only be added if # it isn't the first non-indentation part of the line. # else { my $spaces = ($indent x $level); # # Turn on escaping if this is an escape character. # if($ch eq "\\") { $escaping = 1; } if($ch eq ' ') { if($lines[$lineind] ne $spaces) { $lines[$lineind] .= $ch; } } else { if(length($lines[$lineind]) == 0) { $lines[$lineind] = $spaces; $structure[$lineind] = $level; } $lines[$lineind] .= $ch; } } } # # Blank out any lines that are just whitespace; the lines are # retained, just made to be empty. # Trailing whitespace is removed from each line. # for(my $ind = 0; $ind < @lines; $ind++) { my $ln = $lines[$ind]; $lines[$ind] = '' if($ln =~ /^\s+$/); $lines[$ind] =~ s/\s+$//; } #--------------------------------------------------------------- # # The following two loops give vertical alignment to the value # part of key/value lines. # The first loop find the longest key field for each level. This # is across all blocks, not just within a block. # The second loop reformats the key/value lines so the values will # all line up nicely. # # # Get the maximum key lengths for each indentation level. # $level = 0; for(my $ind = 0; $ind < @lines; $ind++) { my $ln = $lines[$ind]; # Shorthand for current line. # # Get the indentation level from the number of tabs in # front of a block start. # if($ln =~ /^(\s+)[\[{]$/) { my $tmpstr = $1; $tmpstr =~ s/$indent/W/g; $level = length($tmpstr); next; } # # Decrement the level on block close. # if($ln =~ /^(\s+)[\]}],?$/) { $level--; } else { my $lncp = $ln; # Copy of line. # # Skip lines without key/value data. # next if($ln !~ /':/); # # Get the key from the line. # $lncp =~ s/^\s+(.+?)':.*$/$1/; # # Save this key's length if it's greater # than the longest seen so far. # if(length($lncp) > $maxlevels[$level]) { $maxlevels[$level] = length($lncp); $maxstrs[$level] = $lncp; } } } # # Reformat the key/value lines so the values line up nicely when # looked at vertically. This makes things easier to read. # for(my $ind = 0; $ind < @lines; $ind++) { my $ln = $lines[$ind]; # Current line. # # Skip lines that are either a block opener or a block closer. # next if($ln =~ /^(\s+)[\[{]$/); next if($ln =~ /^(\s+)[\]}],?$/); my $level; # Indentation level. my $fore; # Pre-key stuff. my $key; # Line's key. my $aft; # Post-key stuff. my $klen; # Length of key. # # Get the line and calculate the level from the tabs at # the start of the line. # if($ln =~ /^(\s+)/) { my $tmpstr = $1; $tmpstr =~ s/$indent/W/g; $level = length($tmpstr) - 1; } # # Skip lines that don't appear to be key/values. # (This is perhaps not the greatest check in the world, # but it'll do for now.) # next if($ln !~ /':/); # # Get the pieces of the line. # $ln =~ /^(\s+)(.+?')(:.*)$/; $fore = $1; $key = $2; $aft = $3; # # If there isn't a space after the initial colon, we'll # add it on in. # if($aft !~ /^: /) { $aft =~ s/^:/: /; } # # Get the maximum number of characters the key chunk should use. # $klen = $maxlevels[$level] + 1; # # Format the line so the keys all line up vertically. # $lines[$ind] = sprintf("%s%-*s%s", $fore, $klen, $key, $aft); # # Remove any trailing blanks. # $lines[$ind] =~ s/\s+$//; } #--------------------------------------------------------------- # # Delete blank lines iff -noblanks was given. # if($noblanks) { # # Go on a search-and-destroy mission for empty lines. # This runs backwards through the lines list to make # the line index much more straightforward. # for(my $ind = @lines; $ind >= 0; $ind--) { my $ln = $lines[$ind]; next if($lines[$ind] ne ''); splice @lines, $ind, 1; splice @structure, $ind, 1; } # # Sometimes the final line ends up without a newline, # so we'll force one on. # if($lines[-1] !~ /\n/) { $lines[-1] .= "\n"; } } #--------------------------------------------------------------- # # Return the cooked output to our caller. # return(@lines); } #---------------------------------------------------------------------- # Routine: printer() # # Purpose: Print the formatted data. # sub printer { my @lines = @_; # Formatted data to print. my @finals = (); # Final set of formatted lines. my @fstruct = (); # Final set of struct valus. # # Copy the formatted output to the list of final lines. If the # -depth option was given, then only the lines to the specified # depth will be copied. # The level structure list is also copied, such that the final # structure matches the final output lines. # for(my $ind = 0; $ind < @lines; $ind++) { my $ln = $lines[$ind]; # Current line to check. my $lnlvl = $structure[$ind]; # Current line's level. # # If -depth wasn't used, copy everything. # If -depth was specified only copy lines (and line levels) # up to the given depth. # if($depth == -1) { push @finals, $ln; push @fstruct, $lnlvl; } else { if($structure[$ind] <= $depth) { push @finals, $ln; push @fstruct, $lnlvl; } } } # # Print the structure of the data. # if($struct) { for(my $ind = 0; $ind < @fstruct; $ind++) { my $ln = $finals[$ind]; my $lev = ''; if($ln ne '') { $lev = "$fstruct[$ind]\t"; } printf("$lev$ln\n"); } } else { # # Print the plain data. # foreach my $ln (@finals) { print "$ln\n"; } } } #---------------------------------------------------------------------- # Routine: version() # # Purpose: Print the version number(s) and exit. # sub version { print STDERR "$VERS\n"; exit(0); } #---------------------------------------------------------------------- # Routine: usage() # # Purpose: Give usage message and exit. # sub usage { print STDERR "usage: jsonfmt [options] \n"; print STDERR "\n"; print STDERR "\twhere [options] are:\n"; print STDERR "\t\t-depths level\n"; print STDERR "\t\t-indent length\n"; print STDERR "\t\t-noblanks\n"; print STDERR "\t\t-showinput\n"; print STDERR "\t\t-struct\n"; print STDERR "\n"; print STDERR "\t\t-help\n"; print STDERR "\t\t-verbose\n"; print STDERR "\t\t-Version\n"; exit(0); } 1; ############################################################################## =pod =head1 NAME B - format unformatted JSON data =head1 SYNOPSIS jsonfmt [options] =head1 DESCRIPTION B formats a collapsed set of text data into an easy-to-read, expanded version of the data. The data is assumed to be JSON-like, and with the whole thing on a single line. In particular, B was written to handle JSON output returned from ElasticSearch queries. JSON files are B checked for validity and proper formatting. B assumes the input is correct and parses it accordingly. If invalid JSON is given (e.g., improper quoting or unbalanced brackets), the B may report a problem, but it may also be able to parse the input as valid. These JSON-like files use a method of grouping data consisting of numbers, text, booleans, lists, and dictionaries. The underlying format is to provide the data as a set of key/value pairs, and the pairs are grouped as required. Some programs, such as B and B/B, sometimes display data in this format, but it is all collapsed into a single line. B will take the collapsed form of this data and format it into an easy-to-read form. The file given on the command line will be formatted. Standard input is read for data if a file is not specified. An example of the joined-up, hard-to-read version is: {u'took': 2, u'timed_out': False} The formatted version of this example is: { u'took' : 2, u'timed_out': False } This is easy enough to read in a simple example such as this, but it gets much more difficult when there are 20 or 80 key/value pairs, including lists and additional dictionaries. This data representation is quite useful, but it does have potential problems. In this format, commas mark the ends of key/value lines but not I commas mark the ends of lines and not I lines are ended with commas. Values aren't quoted and so checks must be performed to see what follows any comma in a value. The formatting actions are: =over 4 =item * put each key/value entry on its own line =item * put each block marker (left/right squiggly brackets and left/right square brackets) on its own line =item * indent lines to show block membership =item * change each "key=value" line to having spacing between the key and the equals sign. The spacing is done according to the longest key name in each indentation level, giving vertical alignment for values based on the indentation level. =item * a blank line is added after a block's end marker =item * blank lines are removed -- if the I<-noblanks> option was given =back For example, B will convert this blob of unformatted grouped text: [the-answer-to-LUnE=42, foo=84, dogs=[main_dog=woofers, backup_dog=fido, emergency_backup_dog=fifi], bar=888] into this nicely formatted text: [ the-answer-to-LUnE = 42, foo = 84, dogs = [ main_dog = woofers, backup_dog = fido, emergency_backup_dog = fifi ], bar = 888 ] By default, each indentation level is represented by a single tab character. The I<-indent> option allows this to be changed to a particular number of spaces. Also, the full structure of the JSON file will be displayed. The I<-depth> option can be used to limit the depth of lines that will be shown. =head1 OPTIONS B takes the following options: =over 4 =item I<-depth ElevelE> Lines up to the given indentation level are displayed. =item I<-indent EsizeE> Set the number of spaces displayed for each indentation level. If this option is not given, then one tab per level is used. =item I<-noblanks> Blank lines are removed from the output. =item I<-showinput> The original raw input is displayed. =item I<-struct> The structure of the formatted data is displayed. Each non-blank line is prefixed by the indentation level of the line's contents. Brackets lines associated with a particular data line are shown as being on the same level as the data lines. Blank lines are not given a level number. =item I<-verbose> This option doesn't do anything at the moment. =item I<-Version> Display the version information for B. =item I<-help> Display a help message. =back =head1 SEE ALSO B, B, B =head1 AUTHOR Wayne Morrison, wayne@waynemorrison.com =head1 LICENSE Copyright 2020 Wayne Morrison Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. =cut