Reading and Writing the files in HDFS
Reading and Writing the file in HDFS is same as Java. You need to only know the Hdfs specific apis. The only difference is HDFS uses org.apache.hadoop.fs.Path object than File object (java.io).
FSDataOutputStream and FSDataInputStream classes are used for writing the files and reading it.
FileSystem open() returns FSDataInputStream to read the data from HDFS
FileSystem create() returns FSDataOutputStream to write the data to HDFS
FileStatus class is used to get the status of the file. its ownership, creation time, replication etc.
Below Example read the configuration file. The program need a configure file as a input.
package com.valassis.io;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class FileRead {
public static void main(String[] args) throws IOException {
String url = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FSDataInputStream in = fs.open(new Path(url)); IOUtils.copyBytes(in, System.out, conf);
IOUtils.closeStream(in);
}
}
Below Example write the file into HDFS. The program need a file name to create as input.
package com.valassis.io;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class FileWrite {
public static void main(String[] args) throws IOException {
String url = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FSDataOutputStream out = fs.create(new Path(url));
out.writeChars("this is the file written from fs.create");
out.close();
}
}
Below Example gets the file status of HDFS file/directory. The program need a file name to get the status as input.
package com.valassis.io;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hdfs.tools.GetConf;
public class FileStatus_Filter {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Path path = new Path(args[0]);
FileSystem fs = FileSystem.get(new Configuration());
//listStatus takes relative and absolute path of file or directory
//hadoop jar FileStatus_Filter.jar hdfs://localhost.localdomain:8020/user/hdfs/output*
//cause error as output* didnt exist but globStatus will return output,output1 and output2
// FileStatus[] fstatus = fs.listStatus(path, new PathFilter() {
FileStatus[] fstatus = fs.globStatus(path, new PathFilter() {
@Override
public boolean accept(Path arg0) {
// TODO Auto-generated method stub
if(arg0.toString().contains(".staging"))
return false;
return true;
}
});
for(FileStatus tempfs : fstatus)
{
System.out.println(tempfs.getPath().toString());
System.out.println(tempfs.getAccessTime());
System.out.print(tempfs.getBlockSize());
System.out.print(tempfs.getGroup());
System.out.print(tempfs.getLen());
System.out.print(tempfs.getModificationTime());
System.out.print(tempfs.getOwner());
System.out.print(tempfs.getPermission());
System.out.print(tempfs.getReplication());
}
}
}
No comments:
Post a Comment