Monday, December 7, 2015

Portals for fetching open data

[From Open Source For You - November 2015 issue]

Datahub:
http://datahub.io

World Health Organisation:
http://www.who.int/research/en  - not available

The World Bank:
http://data.worldbank.org

NASA:
http://data.nasa.gov

United States Government:
http://www.data.gov



Monday, March 3, 2014

Compressing using bz2 library in C++



For Compiling:
$ g++ code.cpp -lbz2
 

Need to open two FILE*
one to read from and second to write compressed data 

#include <bzlib.h>

int bzerror = BZ_OK;
BZFILE *bfp = BZ2_bzWriteOpen(&bzerror, fpout, 9, 0, 30);
if (bzerror != BZ_OK)
{
    BZ2_bzWriteClose(&bzerror, bfp, 0, NULL, NULL);
    fclose(fpin);
    fclose(fpout);
    return 1;
}

memset(buf, 0, nBuf);
while (fgets(buf, nBuf, fpin) != NULL)
{
    len = strlen(buf);
    BZ2_bzWrite(&bzerror, bfp, buf, len);
    if (bzerror == BZ_IO_ERROR)
    {
        std::cout << "bz-io-error detected\n";
        break;
    }
    memset(buf, 0, nBuf);
}
BZ2_bzWriteClose(&bzerror, bfp, 0, NULL, NULL);

// ---- end ----


BZFILE *BZ2_bzWriteOpen( int *bzerror, 
                                                   FILE *f, 
                                                   int blockSize100k
                                                   int verbosity
                                                   int workFactor );

- blockSize100k specifies the block size to be used for compression
- verbosity should be set to a number between 0 and 4 inclusive. 
  0 is silent, and greater numbers give increasingly verbose monitoring/debugging output 
workFactor controls how the compression phase behaves when presented with worst case,   highly repetitive, input data. 
  If compression runs into difficulties caused by repetitive data,the library switches from the standard sorting algorithm to a fallback algorithm. 
  The fallback is slower than the standard algorithm by perhaps a factor of three, but always behaves reasonably, no matter how bad the input


=====================================

Reading Compressed file ie Extracting Data

#include <bzlib.h>

int nBuf = 512;
memset(buf, 0, nBuf);

bzerror = BZ_OK;
while (bzerror == BZ_OK)
{
    BZ2_bzRead(&bzerror, bfp, (char*)buf, nBuf);
    if (bzerror == BZ_IO_ERROR)
    {
        std::cout << "bz-io-error detected\n";
        break;
    }
    std::cout << buf;
    memset(buf, 0, nBuf);
}
BZ2_bzReadClose(&bzerror, bfp);
fclose(fpin);

// ---- end ----




BZFILE *BZ2_bzReadOpen(int *bzerror, 
                       FILE *f, 
                       int verbosity, 
                       int small
                       void *unused
                       int nUnused );

- If small is 1, the library will try to decompress using less memory, at the expense of speed.
- decompress the nUnused bytes starting at unused

Tuesday, September 11, 2012

URL Decode in C ( http query string )

/*

* to decode http query_string ie to convert special characters in
* Query String to their original form like %20 to space %22 to  
* double-quote
*
* if 'str' contains  type=it%20is%20simple%20%22text%22
* ie when called as
* http://localhost/cgi-bin/a.out?type=it is simple "text"

* when used as cgi-bin with apache

* NOTE:
* for GET method there is environment variable set by apache QUERY_STRING
* for POST method, need to read from STDIN 
*
*/ 

int urlDecode(char *str);

int main(int argc, char *argv[])
{
    char *query_string;
    printf("Content-type: text/html\r\n\r\n"); /* required */
    query_string = getenv("QUERY_STRING");
    if (!query_string)
    {
        printf("QUERY_STRING not found\n"); return 1;
    }
    urlDecode(query_string);
    printf("text received: %s \n", query_string);
}

int urlDecode(char *str)
{
    unsigned int i;
    char tmp[BUFSIZ];
    char *ptr = tmp;
    memset(tmp, 0, sizeof(tmp));

    for (i=0; i < strlen(str); i++)
    {
        if (str[i] != '%')
        {
            *ptr++ = str[i];
            continue;
        }

        if (!isdigit(str[i+1]) || !isdigit(str[i+2]))
        {
            *ptr++ = str[i];
            continue;
        }

        *ptr++ = ((str[i+1] - '0') << 4) | (str[i+2] - '0');
        i += 2;
    }
    *ptr = '\0';
    strcpy(str, tmp);
    return 0;
}

Wednesday, March 14, 2012

satellite.sh - sync favorite folders using pen-drive

#!/bin/bash
# save it as '~/bin/satellite.sh'

ZIP_DRIVE_NAME="KING1GB"

INVALID="usage: $0  < upload | download >"
UP="UPLOAD"
DOWN="DOWNLOAD"

if [ $# -ne 1 ];then
    echo "$INVALID"
    exit 1
fi

arg=$1
a_string=""
a_string_common="Press Enter to continue or Ctrl+C to exit: "
text=""

case "$arg" in
upload|\>)
    arg=$UP
    a_string="UPLOADING\nwill overwrite SATELLITE \033[35G"${a_string_common}
    ;;
download|\<)
    arg=$DOWN
    a_string="DOWNLOADING\nwill overwrite BASE \033[35G"${a_string_common}
    ;;
*)
    arg=NONE
    echo "$INVALID"
    exit 1
esac

echo -ne "${a_string}"
read text

# ---- BLOCK-START ----
# add path to both arrays
# and basename of the path must be a folder

SATPATH=/media/${ZIP_DRIVE_NAME}/.A-BOX
if [ ! -d ${SATPATH} ];then
    SATPATH=/media/${USER}/${ZIP_DRIVE_NAME}/.A-BOX
    if [ ! -d ${SATPATH} ];then
        echo "remote path '$SATPATH' does not exit, check usb"
        exit 2
    fi
fi
 

path_satellite[1]="$SATPATH/bin"
path_satellite[2]="$SATPATH/learning"
path_satellite[3]="$SATPATH/scripting"
path_satellite[4]="$SATPATH/Wallpapers"
#
path_base[1]="$HOME/bin"
path_base[2]="$HOME/Documents/learning"
path_base[3]="$HOME/Documents/turbo/scripting"path_base[4]="$HOME/Pictures/Wallpapers"
#
# ---- BLOCK-END ----

#
# check that both arrays contains same number of elements
#
if [ ${#path_satellite[@]} -ne ${#path_base[@]} ];then
    echo -e "mismatch detected in ${0}\nkindly verify both arrays, exiting"
    exit 1
fi

total=${#path_base[@]}
echo "total: $total"

#
# check that order of elements in both arrays are same
#
counter=1
while [ $counter -le $total ]
do
    temp_sate=$(basename ${path_satellite[$counter]})
    temp_base=$(basename ${path_base[$counter]})

    if [ "${temp_sate}" != "${temp_base}" ];then
        echo "satellite: ${temp_sate}"
        echo "base     : ${temp_base}"
        echo "above paths does not match in ${0}, kindly check and rerun"
        exit 1
    fi
    counter=$(expr $counter + 1)
done

#
# chech if local copy of satellite.sh is latest
#
f_remote="$SATPATH/bin/satellite.sh"
f_local="$HOME/bin/satellite.sh"

if [ -f "${f_remote}" ];then

    skip_check="no"

    script_remote=$(stat ${f_remote} | grep "Modify")
    script_remote=$(echo ${script_remote:8:19} | sed 's/[- :]//g')

    script_local=$(stat ${f_local} | grep "Modify")
    script_local=$(echo ${script_local:8:19} | sed 's/[- :]//g')

    cur_timestamp=$(date +"%Y%m%d%H%M%S")

    echo "remote: ${script_remote}"
    echo "local : ${script_local}"

    #
    # got this error when run 'satellite.sh upload' on ubuntu 12.04
    # followed by running 'satellite.sh download' on fedora 16
    # on fedora 16 timestamp of /media/SATELLITE/.A-BOX/satellite.sh was cur+05:30
    #
    if [ "${script_remote}" -gt "${cur_timestamp}" ];then
        echo "remote script timestamp is set to future !!! could be time-setting problem !!!"
        echo -n "do you want to SKIP timestamp check !!! : "
        read choice

        if [ "$choice" = "y" -o "$choice" = "Y" ];then
            echo "skipping timestamp check"
            skip_check="yes"
        fi
    fi


    if [ "${script_remote}" -gt "${script_local}" -a "$skip_check" = "no" ];then
        cp -f ${f_remote} ${f_local}
        echo "local copy was outdated, hence updated"
        echo "re-run: ${0}"
        exit 1
    fi
fi


RSYNC_CMD="-av --delete --exclude=.metadata"

if [ "$arg" = "UPLOAD" ];then
    for (( x=1; x<=$total; x++))
    do
        [ ! -d ${path_satellite[x]} ] && mkdir -p ${path_satellite[x]}
        echo -e "\033[37;42;1m==> syncing\033[35G`basename ${path_base[x]}/`\033[m"
        #rsync -av --delete --exclude=".metadata" ${path_base[x]}/ ${path_satellite[x]}
        rsync ${RSYNC_CMD} ${path_base[x]}/ ${path_satellite[x]}
    done

elif [ "$arg" = "DOWNLOAD" ];then
    for (( x=1; x<=$total; x++))
    do
        [ ! -d ${path_base[x]} ] && mkdir -p ${path_base[x]}
        echo -e "\033[37;42;1m==> syncing\033[35G`basename ${path_satellite[x]}`\033[m"
        #rsync -av --delete --exclude=".metadata" ${path_satellite[x]}/ ${path_base[x]}
        rsync ${RSYNC_CMD} ${path_satellite[x]}/ ${path_base[x]}

    done

else
    echo "invalid option"
    exit 1
fi

echo -e "\033[;44;1m==> syncing\033[35GDONE\033[m"



#------------ END ------------



SATELLITE is the name of my pen-drive
run './satellite.sh upload' or './satellite.sh \>' to sync data to pen-drive
and './satellite.sh download' or './satellite.sh \<' to sync data on machine from pen-drive

it helps in syncing data, if work on different machines/laptops and need to carry personal/required folders with you all time.

before starting work use 'download' after finishing work 'upload' to SATELLITE.


Friday, December 9, 2011

time delay routine in C

/*
 * filename : counter.c
 * date     : 09-12-2011
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char default_msg[] = "counter will expire in following seconds";

int main(int argc, char* argv[])
{
    char *msg = default_msg;
    int delay = 0, hour, min, sec, days;
    int i;

    days = hour = min = sec = 0;

    switch (argc)
    {
        case 3:
            msg = argv[2];
        case 2:

            for (i=0; i < strlen(argv[1]); i++)
            {
                if (!isdigit(argv[1][i]))
                {
                    printf("invalid digit '%c' at position %d in argument\n", argv[1][i], i);
                    return -1;
                }
            }
            delay = atoi(argv[1]);
            break;
        default:
            printf("invalid number of arguments\n"
                    "usage: %s [\"message\"]\n", argv[0]);
            fflush(stdout);
            return -1;
    }

    /*
     * calculate exact time in hh:mm:ss entered for delay-sec
     */
    sec  = delay % 60;
    min  = (delay / 60) % 60;
    hour = (delay / 3600) % 24;
    days = delay / (3600 * 24);

    printf("MSG: %s - DAYS: %d, %d:%d:%d\n", msg, days, hour, min, sec);
    for (i=delay; i >= 0 ; i--)
    {
        printf("\rcounter: %d", i);
        fflush(stdout);        sleep(1);
    }
    printf("\n");

    return 0;
}


/*
* compile with gcc
* gcc counter.c -o COUNTER
* ./COUNTER 15
* ./COUNTER 97405
*/

Friday, November 11, 2011

my ~/.vimrc file

set hlsearch
set nu
set ignorecase
set expandtab
set shiftwidth=4
set softtabstop=4

if has ("autocmd")
    autocmd BufRead *.file set tw=150
    autocmd BufRead *.prvc set nobackup nowritebackup foldmethod=indent fdo=insert
    filetype plugin indent on
    au BufReadPost * if line("'\"") > 1 && line("'\"") <= line("$") | exe "normal! g'\"" | endif
endif


For gVim, following options can be added to "_vimrc" file
at path: C:\Users\\_vimrc

set noswf

if has("gui_running")
    if has("gui_win32")
        set guifont=Fixedsys:h11
    endif
endif


# other options:
# gui_gtk2 / gui_macvim / gui_gtk3 / gui_win32
 



Wednesday, November 9, 2011

Create Bootable Fedora USB stick from ISO image on any Linux distro


Fedora-15-x86_64-Live-KDE.iso,
usb-stick (/dev/sdb1) 8GB having label SATELLITE (1GB is enough),
and syslinux command.

Following are the steps:
# mkdir /mnt/{iso,usb}
# mount -t iso9660 /home/user/Fedora-15-x86_64-Live-KDE.iso /mnt/iso
# mount /dev/sdb1 /mnt/usb
NOTE: /dev/sdb1 is my usb-stick
# cp -rv /mnt/iso/* /mnt/usb/
# syslinux --install -d EFI/boot/ /dev/sdb1
used '-d EFI/boot/' because vmlinuz and initrd image file exist in this folder wrt to root of usb ie /mnt/usb
otherwise need to edit their paths accordingly in syslinux.cfg.
# cd /mnt/usb/EFI/boot
# cp isolinux.cfg syslinux.cfg
edit syslinux.cfg and edit 'root' parameter of kernel
root=LABEL=SATELLITE
eg,
label linux0
   menu label Boot
   kernel vmlinuz0
   append initrd=initrd0.img root=LABEL=SATELLITE rootfstype=auto ro liveimg rd.luks=0 rd.md=0 rd.dm=0


Can Test it with qemu or qemu-system-x86_64:
# qemu-system-x86_64 -hda /dev/sdb1 -m256 -vga std

If boot successfully, just reboot the system and boot it from usb.

NOTE: label can be set using command: e2label /dev/sdb1 SATELLITE